Wykorzystane biblioteki:

  • readxl - wczytuje plik xlsx,
  • dplyr, tidyr, lubridate, tibble, zoo - manipuluje danymi,
  • ggplot2, lattice, plotly, rmarkdown - wizualizacja danych,
  • DT - tworzy estetyczne tabele,
  • ggcorrplot - wizualizuje graficznie korelacje,
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(DT)
library(lattice)
library(plotly)
library(lubridate)
library(tibble)
library(rmarkdown)
library(zoo)
library(ggcorrplot)
library(caret)

1 Dane

1.1 Podsumowanie danych

Poniższy blok kodu wczytuje dane:

  • goldPrice - ceny złota,
  • currencyExchangeRates - kursy wymiany walut,
  • spComposite - indeks giełdowy amerykańskich akcji firmy Standard & Poor’s,
  • worldDevelopmentIndicators - światowe wskaźniki rozwoju,
setwd("D:\\studia\\ZED\\projekt\\Data pack\\")
goldPrice <- as_tibble(read.csv(file = "Gold prices.csv"))
currencyExchangeRates <-  as_tibble(read.csv(file = "CurrencyExchangeRates.csv"))
spComposite <-  as_tibble(read.csv(file = "S&P Composite.csv"))
worldDevelopmentIndicators <- as_tibble(read_excel("World_Development_Indicators.xlsx"))

Poniższy blok kodu wczytuje dane odnośnie bitcoina:

  • Bitcoin
    • BCHAIN_metadata
    • BCHAIN-DIFF
    • BCHAIN-HRATE
    • BCHAIN-MKPRU
    • BCHAIN-TRVOU
setwd("D:\\studia\\ZED\\projekt\\Data pack\\Bitcoin")
bchain_metadata  <- read.csv(file = "BCHAIN_metadata.csv")
bchain_diff <- read.csv(file = "BCHAIN-DIFF.csv")
bchain_hrate <- read.csv(file = "BCHAIN-HRATE.csv")
bchain_mkpru <- read.csv(file = "BCHAIN-MKPRU.csv")
bchain_trvou <- read.csv(file = "BCHAIN-TRVOU.csv")

1.2 Ceny złota

summary(goldPrice)
##      Date              USD..AM.          USD..PM.          GBP..AM.      
##  Length:13585       Min.   :  34.77   Min.   :  34.75   Min.   :  14.48  
##  Class :character   1st Qu.: 280.50   1st Qu.: 281.50   1st Qu.: 177.71  
##  Mode  :character   Median : 383.32   Median : 383.50   Median : 234.51  
##                     Mean   : 575.20   Mean   : 576.62   Mean   : 370.84  
##                     3rd Qu.: 841.94   3rd Qu.: 851.50   3rd Qu.: 454.32  
##                     Max.   :2061.50   Max.   :2067.15   Max.   :1574.37  
##                     NA's   :1         NA's   :143       NA's   :11       
##     GBP..PM.         EURO..AM.        EURO..PM.     
##  Min.   :  14.48   Min.   : 237.3   Min.   : 236.7  
##  1st Qu.: 178.23   1st Qu.: 335.3   1st Qu.: 335.2  
##  Median : 234.96   Median : 892.6   Median : 896.1  
##  Mean   : 371.81   Mean   : 797.3   Mean   : 797.2  
##  3rd Qu.: 456.43   3rd Qu.:1114.1   3rd Qu.:1114.9  
##  Max.   :1569.59   Max.   :1743.8   Max.   :1743.4  
##  NA's   :154       NA's   :7837     NA's   :7880

Czyszczenie danych Na tym etapie uznałem że będę używał średnią cenę złota w danym dniu, jeżeli ceny nie będzie z rana lub wieczora będzie brana ta która jest.

gp<- goldPrice %>% 
    mutate(Date=as.Date(Date,format="%Y-%m-%d")) %>% 
    mutate(usd=
        ifelse(is.na(USD..AM.), USD..PM.,
            ifelse(is.na(USD..PM.), USD..AM.,
                round((USD..AM.+USD..PM.)/2.0,digits=2)
            )
        ),
        gbp=
        ifelse(is.na(GBP..AM.), GBP..PM.,
            ifelse(is.na(GBP..PM.), GBP..AM.,
                round((GBP..AM.+GBP..PM.)/2.0,digits=2)
            )
        ),
        euro=
        ifelse(is.na(EURO..AM.), EURO..PM.,
            ifelse(is.na(EURO..PM.), EURO..AM.,
                round((EURO..AM.+EURO..PM.)/2.0,digits=2)
            )
        )
    ) %>% 
    rename(g_date=Date, g_usd=usd, g_gbp=gbp,g_euro=euro) %>%
    select(g_date,g_usd,g_gbp,g_euro)

summary(gp)
##      g_date               g_usd             g_gbp             g_euro      
##  Min.   :1968-01-02   Min.   :  34.76   Min.   :  14.48   Min.   : 237.0  
##  1st Qu.:1981-06-10   1st Qu.: 280.28   1st Qu.: 177.71   1st Qu.: 335.2  
##  Median :1994-11-14   Median : 383.38   Median : 234.51   Median : 894.7  
##  Mean   :1994-11-16   Mean   : 575.07   Mean   : 370.78   Mean   : 797.3  
##  3rd Qu.:2008-04-23   3rd Qu.: 841.00   3rd Qu.: 454.80   3rd Qu.:1114.7  
##  Max.   :2021-09-29   Max.   :2058.15   Max.   :1566.94   Max.   :1736.2  
##                                         NA's   :11        NA's   :7837
ggplot(data=gp, aes(g_date)) + 
  geom_line(aes(y = g_usd, colour = "g_usd")) + 
  geom_line(aes(y = g_euro, colour = "g_euro")) + 
  geom_line(aes(y = g_gbp, colour = "g_gbp"))

1.3 Kursy walut

colnames(currencyExchangeRates)
##  [1] "Date"                       "Algerian.Dinar"            
##  [3] "Australian.Dollar"          "Bahrain.Dinar"             
##  [5] "Bolivar.Fuerte"             "Botswana.Pula"             
##  [7] "Brazilian.Real"             "Brunei.Dollar"             
##  [9] "Canadian.Dollar"            "Chilean.Peso"              
## [11] "Chinese.Yuan"               "Colombian.Peso"            
## [13] "Czech.Koruna"               "Danish.Krone"              
## [15] "Euro"                       "Hungarian.Forint"          
## [17] "Icelandic.Krona"            "Indian.Rupee"              
## [19] "Indonesian.Rupiah"          "Iranian.Rial"              
## [21] "Israeli.New.Sheqel"         "Japanese.Yen"              
## [23] "Kazakhstani.Tenge"          "Korean.Won"                
## [25] "Kuwaiti.Dinar"              "Libyan.Dinar"              
## [27] "Malaysian.Ringgit"          "Mauritian.Rupee"           
## [29] "Mexican.Peso"               "Nepalese.Rupee"            
## [31] "New.Zealand.Dollar"         "Norwegian.Krone"           
## [33] "Nuevo.Sol"                  "Pakistani.Rupee"           
## [35] "Peso.Uruguayo"              "Philippine.Peso"           
## [37] "Polish.Zloty"               "Qatar.Riyal"               
## [39] "Rial.Omani"                 "Russian.Ruble"             
## [41] "Saudi.Arabian.Riyal"        "Singapore.Dollar"          
## [43] "South.African.Rand"         "Sri.Lanka.Rupee"           
## [45] "Swedish.Krona"              "Swiss.Franc"               
## [47] "Thai.Baht"                  "Trinidad.And.Tobago.Dollar"
## [49] "Tunisian.Dinar"             "U.A.E..Dirham"             
## [51] "U.K..Pound.Sterling"        "U.S..Dollar"
nrow(currencyExchangeRates)
## [1] 5978
summary(currencyExchangeRates)
##      Date           Algerian.Dinar   Australian.Dollar Bahrain.Dinar  
##  Length:5978        Min.   : 71.29   Min.   :0.4833    Min.   :0.376  
##  Class :character   1st Qu.: 77.50   1st Qu.:0.6654    1st Qu.:0.376  
##  Mode  :character   Median : 81.28   Median :0.7595    Median :0.376  
##                     Mean   : 90.59   Mean   :0.7683    Mean   :0.376  
##                     3rd Qu.:108.88   3rd Qu.:0.8689    3rd Qu.:0.376  
##                     Max.   :115.58   Max.   :1.1055    Max.   :0.376  
##                     NA's   :4112     NA's   :263       NA's   :69     
##  Bolivar.Fuerte     Botswana.Pula    Brazilian.Real  Brunei.Dollar  
##  Min.   :    2.14   Min.   :0.0855   Min.   :0.832   Min.   :1.000  
##  1st Qu.:    2.59   1st Qu.:0.1197   1st Qu.:1.709   1st Qu.:1.348  
##  Median :    6.28   Median :0.1528   Median :2.048   Median :1.468  
##  Mean   :  835.09   Mean   :0.1965   Mean   :2.161   Mean   :1.508  
##  3rd Qu.:    6.28   3rd Qu.:0.1844   3rd Qu.:2.794   3rd Qu.:1.698  
##  Max.   :68827.50   Max.   :4.8414   Max.   :4.195   Max.   :1.851  
##  NA's   :3664       NA's   :1275     NA's   :539     NA's   :1246   
##  Canadian.Dollar  Chilean.Peso    Chinese.Yuan   Colombian.Peso  
##  Min.   :0.917   Min.   :377.5   Min.   :6.093   Min.   : 833.2  
##  1st Qu.:1.086   1st Qu.:503.5   1st Qu.:6.495   1st Qu.:1786.0  
##  Median :1.297   Median :538.6   Median :6.989   Median :2017.6  
##  Mean   :1.268   Mean   :561.8   Mean   :7.316   Mean   :2073.1  
##  3rd Qu.:1.409   3rd Qu.:619.8   3rd Qu.:8.277   3rd Qu.:2482.9  
##  Max.   :1.613   Max.   :758.2   Max.   :8.746   Max.   :3434.9  
##  NA's   :356     NA's   :1220    NA's   :1316    NA's   :582     
##   Czech.Koruna    Danish.Krone        Euro        Hungarian.Forint
##  Min.   :14.45   Min.   :4.665   Min.   :0.8252   Min.   :144.1   
##  1st Qu.:19.35   1st Qu.:5.612   1st Qu.:1.0889   1st Qu.:202.7   
##  Median :21.88   Median :6.051   Median :1.2295   Median :224.3   
##  Mean   :22.95   Mean   :6.281   Mean   :1.2076   Mean   :231.1   
##  3rd Qu.:24.94   3rd Qu.:6.805   3rd Qu.:1.3338   3rd Qu.:267.6   
##  Max.   :40.29   Max.   :9.006   Max.   :1.5990   Max.   :318.7   
##  NA's   :1850    NA's   :251     NA's   :1070     NA's   :1415    
##  Icelandic.Krona   Indian.Rupee   Indonesian.Rupiah  Iranian.Rial  
##  Min.   : 54.72   Min.   :31.37   Min.   : 2201     Min.   : 1699  
##  1st Qu.: 70.28   1st Qu.:42.82   1st Qu.: 8855     1st Qu.: 1755  
##  Median : 83.48   Median :45.92   Median : 9260     Median : 8992  
##  Mean   : 92.46   Mean   :48.02   Mean   : 9144     Mean   :10718  
##  3rd Qu.:117.15   3rd Qu.:52.33   3rd Qu.:11380     3rd Qu.:11180  
##  Max.   :147.98   Max.   :68.78   Max.   :14850     Max.   :42000  
##  NA's   :354      NA's   :429     NA's   :1492      NA's   :1312   
##  Israeli.New.Sheqel  Japanese.Yen    Kazakhstani.Tenge   Korean.Won  
##  Min.   :3.230      Min.   : 75.86   Min.   :117.2     Min.   : 756  
##  1st Qu.:3.676      1st Qu.:100.70   1st Qu.:145.4     1st Qu.:1013  
##  Median :3.882      Median :109.39   Median :150.3     Median :1122  
##  Mean   :4.003      Mean   :107.97   Mean   :185.6     Mean   :1100  
##  3rd Qu.:4.370      3rd Qu.:118.38   3rd Qu.:185.7     3rd Qu.:1186  
##  Max.   :4.994      Max.   :147.00   Max.   :383.9     Max.   :1965  
##  NA's   :1939       NA's   :316      NA's   :3051      NA's   :601   
##  Kuwaiti.Dinar     Libyan.Dinar   Malaysian.Ringgit Mauritian.Rupee
##  Min.   :0.2646   Min.   :0.525   Min.   :2.436     Min.   :25.15  
##  1st Qu.:0.2854   1st Qu.:0.662   1st Qu.:3.188     1st Qu.:29.12  
##  Median :0.2947   Median :1.932   Median :3.676     Median :30.67  
##  Mean   :0.2936   Mean   :1.510   Mean   :3.508     Mean   :31.03  
##  3rd Qu.:0.3027   3rd Qu.:1.932   3rd Qu.:3.800     3rd Qu.:32.89  
##  Max.   :0.3089   Max.   :1.932   Max.   :4.725     Max.   :36.50  
##  NA's   :1054     NA's   :123     NA's   :301       NA's   :2460   
##   Mexican.Peso    Nepalese.Rupee   New.Zealand.Dollar Norwegian.Krone
##  Min.   : 5.915   Min.   : 49.88   Min.   :0.3927     Min.   :4.959  
##  1st Qu.:10.953   1st Qu.: 68.33   1st Qu.:0.5813     1st Qu.:6.104  
##  Median :12.680   Median : 74.04   Median :0.6844     Median :6.709  
##  Mean   :13.116   Mean   : 77.37   Mean   :0.6606     Mean   :6.965  
##  3rd Qu.:13.668   3rd Qu.: 86.80   3rd Qu.:0.7364     3rd Qu.:7.806  
##  Max.   :21.908   Max.   :109.98   Max.   :0.8822     Max.   :9.606  
##  NA's   :2266     NA's   :479      NA's   :310        NA's   :291    
##    Nuevo.Sol     Pakistani.Rupee  Peso.Uruguayo   Philippine.Peso
##  Min.   :2.539   Min.   : 30.88   Min.   : 9.32   Min.   :24.55  
##  1st Qu.:2.755   1st Qu.: 51.79   1st Qu.:20.07   1st Qu.:43.18  
##  Median :2.819   Median : 60.75   Median :22.94   Median :44.40  
##  Mean   :2.960   Mean   : 70.24   Mean   :24.11   Mean   :45.01  
##  3rd Qu.:3.243   3rd Qu.: 94.29   3rd Qu.:28.44   3rd Qu.:47.10  
##  Max.   :3.522   Max.   :115.70   Max.   :32.53   Max.   :52.35  
##  NA's   :4297    NA's   :488      NA's   :4287    NA's   :4198   
##   Polish.Zloty    Qatar.Riyal     Rial.Omani     Russian.Ruble  
##  Min.   :2.022   Min.   :3.64   Min.   :0.3845   Min.   :23.13  
##  1st Qu.:3.033   1st Qu.:3.64   1st Qu.:0.3845   1st Qu.:28.27  
##  Median :3.290   Median :3.64   Median :0.3845   Median :30.54  
##  Mean   :3.365   Mean   :3.64   Mean   :0.3845   Mean   :36.91  
##  3rd Qu.:3.822   3rd Qu.:3.64   3rd Qu.:0.3845   3rd Qu.:36.20  
##  Max.   :4.500   Max.   :3.64   Max.   :0.3845   Max.   :83.59  
##  NA's   :1765    NA's   :47     NA's   :56       NA's   :2435   
##  Saudi.Arabian.Riyal Singapore.Dollar South.African.Rand Sri.Lanka.Rupee 
##  Min.   :3.745       Min.   :1.201    Min.   : 3.530     Min.   : 49.57  
##  1st Qu.:3.745       1st Qu.:1.361    1st Qu.: 6.213     1st Qu.: 77.54  
##  Median :3.750       Median :1.444    Median : 7.480     Median :103.99  
##  Mean   :3.749       Mean   :1.503    Mean   : 8.113     Mean   :102.19  
##  3rd Qu.:3.750       3rd Qu.:1.687    3rd Qu.: 9.995     3rd Qu.:126.29  
##  Max.   :3.750       Max.   :1.851    Max.   :16.771     Max.   :157.65  
##  NA's   :46          NA's   :259      NA's   :535        NA's   :509     
##  Swedish.Krona     Swiss.Franc       Thai.Baht     Trinidad.And.Tobago.Dollar
##  Min.   : 5.843   Min.   :0.7253   Min.   :24.44   Min.   :5.839             
##  1st Qu.: 6.838   1st Qu.:0.9777   1st Qu.:31.50   1st Qu.:6.260             
##  Median : 7.618   Median :1.1878   Median :34.65   Median :6.282             
##  Mean   : 7.741   Mean   :1.2090   Mean   :35.14   Mean   :6.310             
##  3rd Qu.: 8.384   3rd Qu.:1.3903   3rd Qu.:39.45   3rd Qu.:6.382             
##  Max.   :10.995   Max.   :1.8228   Max.   :56.06   Max.   :6.789             
##  NA's   :349      NA's   :239      NA's   :565     NA's   :657               
##  Tunisian.Dinar  U.A.E..Dirham   U.K..Pound.Sterling  U.S..Dollar
##  Min.   :1.342   Min.   :3.671   Min.   :1.213       Min.   :1   
##  1st Qu.:1.566   1st Qu.:3.672   1st Qu.:1.519       1st Qu.:1   
##  Median :1.723   Median :3.672   Median :1.599       Median :1   
##  Mean   :1.850   Mean   :3.672   Mean   :1.615       Mean   :1   
##  3rd Qu.:2.157   3rd Qu.:3.672   3rd Qu.:1.676       3rd Qu.:1   
##  Max.   :2.509   Max.   :3.675   Max.   :2.102       Max.   :1   
##  NA's   :4258    NA's   :71      NA's   :122
cer <- currencyExchangeRates %>%
  mutate(Date=as.Date(Date,format="%Y-%m-%d"))%>%
  gather(key="currency", value="value", 2:52) %>%
  filter(!is.na(value))

summary(cer)
##       Date              currency             value         
##  Min.   :1995-01-02   Length:243689      Min.   :    0.09  
##  1st Qu.:2002-03-01   Class :character   1st Qu.:    1.44  
##  Median :2008-01-10   Mode  :character   Median :    5.65  
##  Mean   :2007-08-01                      Mean   :  485.89  
##  3rd Qu.:2013-04-12                      3rd Qu.:   57.11  
##  Max.   :2018-05-02                      Max.   :68827.50
cerTmp<-cer %>% filter(currency %in% c("Euro","Polish.Zloty","Japanese.Yen","U.S..Dollar"))
ggplot(data=cerTmp,aes(x=Date, y=value))+geom_line()+ facet_wrap(~currency, scales = "free", ncol = 2)

1.4 Indeks giełdowy Standard & Poor’s

spComposite <- spComposite %>%
  mutate(Year=as.Date(Year,format="%Y-%m-%d")) %>%
  arrange(Year)

summary(spComposite)
##       Year            S.P.Composite         Dividend          Earnings       
##  Min.   :1871-01-31   Min.   :   2.730   Min.   : 0.1800   Min.   :  0.1600  
##  1st Qu.:1908-10-07   1st Qu.:   7.902   1st Qu.: 0.4202   1st Qu.:  0.5608  
##  Median :1946-06-15   Median :  17.370   Median : 0.8717   Median :  1.4625  
##  Mean   :1946-06-15   Mean   : 327.968   Mean   : 6.7321   Mean   : 15.3714  
##  3rd Qu.:1984-02-21   3rd Qu.: 164.400   3rd Qu.: 7.0525   3rd Qu.: 14.7258  
##  Max.   :2021-10-31   Max.   :4493.280   Max.   :59.6800   Max.   :158.7400  
##                                          NA's   :4         NA's   :4         
##       CPI         Long.Interest.Rate   Real.Price     Real.Dividend   
##  Min.   :  6.28   Min.   : 0.620     Min.   :  73.9   Min.   : 5.445  
##  1st Qu.: 10.20   1st Qu.: 3.171     1st Qu.: 186.6   1st Qu.: 9.417  
##  Median : 20.35   Median : 3.815     Median : 283.3   Median :14.411  
##  Mean   : 62.39   Mean   : 4.504     Mean   : 622.0   Mean   :17.498  
##  3rd Qu.:102.28   3rd Qu.: 5.139     3rd Qu.: 707.0   3rd Qu.:22.301  
##  Max.   :273.98   Max.   :15.320     Max.   :4477.2   Max.   :63.511  
##                                                       NA's   :4       
##  Real.Earnings     Cyclically.Adjusted.PE.Ratio
##  Min.   :  4.576   Min.   : 4.784              
##  1st Qu.: 14.063   1st Qu.:11.898              
##  Median : 23.524   Median :16.381              
##  Mean   : 34.907   Mean   :17.215              
##  3rd Qu.: 43.768   3rd Qu.:20.913              
##  Max.   :159.504   Max.   :44.198              
##  NA's   :4         NA's   :120
colnames(spComposite)
##  [1] "Year"                         "S.P.Composite"               
##  [3] "Dividend"                     "Earnings"                    
##  [5] "CPI"                          "Long.Interest.Rate"          
##  [7] "Real.Price"                   "Real.Dividend"               
##  [9] "Real.Earnings"                "Cyclically.Adjusted.PE.Ratio"
head(spComposite)
## # A tibble: 6 x 10
##   Year       S.P.Composite Dividend Earnings   CPI Long.Interest.Rate Real.Price
##   <date>             <dbl>    <dbl>    <dbl> <dbl>              <dbl>      <dbl>
## 1 1871-01-31          4.44     0.26      0.4  12.5               5.32       97.3
## 2 1871-02-28          4.5      0.26      0.4  12.8               5.32       95.6
## 3 1871-03-31          4.61     0.26      0.4  13.0               5.33       96.6
## 4 1871-04-30          4.74     0.26      0.4  12.6               5.33      103. 
## 5 1871-05-31          4.86     0.26      0.4  12.3               5.33      108. 
## 6 1871-06-30          4.82     0.26      0.4  12.1               5.34      109. 
## # ... with 3 more variables: Real.Dividend <dbl>, Real.Earnings <dbl>,
## #   Cyclically.Adjusted.PE.Ratio <dbl>
spComposite <- spComposite %>% fill(names(.),.direction="updown")

summary(spComposite)
##       Year            S.P.Composite         Dividend          Earnings       
##  Min.   :1871-01-31   Min.   :   2.730   Min.   : 0.1800   Min.   :  0.1600  
##  1st Qu.:1908-10-07   1st Qu.:   7.902   1st Qu.: 0.4210   1st Qu.:  0.5637  
##  Median :1946-06-15   Median :  17.370   Median : 0.8833   Median :  1.4760  
##  Mean   :1946-06-15   Mean   : 327.968   Mean   : 6.8451   Mean   : 15.6882  
##  3rd Qu.:1984-02-21   3rd Qu.: 164.400   3rd Qu.: 7.1425   3rd Qu.: 14.7525  
##  Max.   :2021-10-31   Max.   :4493.280   Max.   :59.6800   Max.   :158.7400  
##       CPI         Long.Interest.Rate   Real.Price     Real.Dividend   
##  Min.   :  6.28   Min.   : 0.620     Min.   :  73.9   Min.   : 5.445  
##  1st Qu.: 10.20   1st Qu.: 3.171     1st Qu.: 186.6   1st Qu.: 9.423  
##  Median : 20.35   Median : 3.815     Median : 283.3   Median :14.418  
##  Mean   : 62.39   Mean   : 4.504     Mean   : 622.0   Mean   :17.588  
##  3rd Qu.:102.28   3rd Qu.: 5.139     3rd Qu.: 707.0   3rd Qu.:22.363  
##  Max.   :273.98   Max.   :15.320     Max.   :4477.2   Max.   :63.511  
##  Real.Earnings     Cyclically.Adjusted.PE.Ratio
##  Min.   :  4.576   Min.   : 4.784              
##  1st Qu.: 14.074   1st Qu.:12.227              
##  Median : 23.546   Median :16.871              
##  Mean   : 35.182   Mean   :17.298              
##  3rd Qu.: 43.819   3rd Qu.:20.478              
##  Max.   :159.504   Max.   :44.198
spComposite <- spComposite%>%
  mutate(month = format(Year, "%m"), year = format(Year, "%Y"))%>%
  select(-c('Year'))

head(spComposite)
## # A tibble: 6 x 11
##   S.P.Composite Dividend Earnings   CPI Long.Interest.Rate Real.Price
##           <dbl>    <dbl>    <dbl> <dbl>              <dbl>      <dbl>
## 1          4.44     0.26      0.4  12.5               5.32       97.3
## 2          4.5      0.26      0.4  12.8               5.32       95.6
## 3          4.61     0.26      0.4  13.0               5.33       96.6
## 4          4.74     0.26      0.4  12.6               5.33      103. 
## 5          4.86     0.26      0.4  12.3               5.33      108. 
## 6          4.82     0.26      0.4  12.1               5.34      109. 
## # ... with 5 more variables: Real.Dividend <dbl>, Real.Earnings <dbl>,
## #   Cyclically.Adjusted.PE.Ratio <dbl>, month <chr>, year <chr>
ggplot(data=spComposite, aes(x=make_date(year=year, month=month),y=Earnings)) + geom_line() 

ggplot(data=spComposite, aes(x=make_date(year=year, month=month),y=CPI)) + geom_line()

tmpdf <- spComposite %>% select(-c(month,year))
corr <- round(cor(tmpdf), 1)
ggcorrplot(corr, type = "lower", lab = TRUE)

1.5 Światowe wskaźniki rozwoju

 colnames(worldDevelopmentIndicators)
##  [1] "Country Name"  "Country Code"  "Series Name"   "Series Code"  
##  [5] "1970 [YR1970]" "1971 [YR1971]" "1972 [YR1972]" "1973 [YR1973]"
##  [9] "1974 [YR1974]" "1975 [YR1975]" "1976 [YR1976]" "1977 [YR1977]"
## [13] "1978 [YR1978]" "1979 [YR1979]" "1980 [YR1980]" "1981 [YR1981]"
## [17] "1982 [YR1982]" "1983 [YR1983]" "1984 [YR1984]" "1985 [YR1985]"
## [21] "1986 [YR1986]" "1987 [YR1987]" "1988 [YR1988]" "1989 [YR1989]"
## [25] "1990 [YR1990]" "1991 [YR1991]" "1992 [YR1992]" "1993 [YR1993]"
## [29] "1994 [YR1994]" "1995 [YR1995]" "1996 [YR1996]" "1997 [YR1997]"
## [33] "1998 [YR1998]" "1999 [YR1999]" "2000 [YR2000]" "2001 [YR2001]"
## [37] "2002 [YR2002]" "2003 [YR2003]" "2004 [YR2004]" "2005 [YR2005]"
## [41] "2006 [YR2006]" "2007 [YR2007]" "2008 [YR2008]" "2009 [YR2009]"
## [45] "2010 [YR2010]" "2011 [YR2011]" "2012 [YR2012]" "2013 [YR2013]"
## [49] "2014 [YR2014]" "2015 [YR2015]" "2016 [YR2016]" "2017 [YR2017]"
## [53] "2018 [YR2018]" "2019 [YR2019]" "2020 [YR2020]"
wdi <- gather(worldDevelopmentIndicators,key="year", value="developmentIndicators", 5:55) %>%
  mutate(year = substr(year,1,4)) %>%
  filter(developmentIndicators!="..") %>%
  mutate_at("developmentIndicators", as.numeric) %>%
  mutate_at("year", as.numeric) %>%
  #filter(year>2000) %>%
  rename(countryCode="Country Code") %>%
  rename(indicator="Series Code") %>%
  rename(seriesName="Series Name")
  #filter(indicator %in% c("SP.URB.TOTL.IN.ZS","SH.STA.SUIC.P5","CM.MKT.TRAD.CD","NY.GDP.MKTP.CD"))
  
wdi_tmp <-wdi %>% filter(countryCode %in% c("DEU","USA","GBR","JPN","RUS","IDN","POL","WLD","CHN"))

summary(wdi_tmp)
##  Country Name       countryCode         seriesName         indicator        
##  Length:59534       Length:59534       Length:59534       Length:59534      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       year      developmentIndicators
##  Min.   :1970   Min.   :-4.813e+14   
##  1st Qu.:1987   1st Qu.: 8.000e+00   
##  Median :2000   Median : 4.100e+01   
##  Mean   :1998   Mean   : 2.806e+12   
##  3rd Qu.:2010   3rd Qu.: 4.643e+05   
##  Max.   :2020   Max.   : 7.614e+15
z <- translate%>%select("Indicator Name")
paged_table(z, options = list(cols.print = 10,cols.min.print=1))
gdpchart <- wdi_tmp %>% filter(indicator =="NY.GDP.MKTP.CD" & countryCode != "WLD")
xyplot(developmentIndicators ~ year | countryCode, data = gdpchart,layout=c(4,2), 
       grid = TRUE, main="GDP (current US$)")

gdpchart <- wdi_tmp %>% filter(indicator =="SP.URB.TOTL.IN.ZS" & countryCode != "WLD")
xyplot(developmentIndicators ~ year | countryCode, data = gdpchart,layout=c(4,2), 
       grid = TRUE, main="Urban population (% of total population)")

gdpchart <- wdi_tmp %>% filter(indicator =="SH.STA.SUIC.P5" & countryCode != "WLD")
xyplot(developmentIndicators ~ year | countryCode, data = gdpchart,layout=c(4,2), 
       grid = TRUE, main="Suicide mortality rate (per 100,000 population)")

gdpchart <- wdi_tmp %>% filter(indicator =="CM.MKT.TRAD.CD" & countryCode != "WLD")
xyplot(developmentIndicators ~ year | countryCode, data = gdpchart,layout=c(4,2), 
       grid = TRUE, main="Stocks traded, total value (current US$)")

1.6 Bitcoin

bchain_metadata %>%
  filter(code %in% c("DIFF", "HRATE", "MKPRU","TRVOU")) %>% 
  select(code, name)
##    code                              name
## 1  DIFF                Bitcoin Difficulty
## 2 HRATE                 Bitcoin Hash Rate
## 3 MKPRU          Bitcoin Market Price USD
## 4 TRVOU Bitcoin USD Exchange Trade Volume
summary(bchain_mkpru)
##      Date               Value        
##  Length:4661        Min.   :    0.0  
##  Class :character   1st Qu.:    7.2  
##  Mode  :character   Median :  431.9  
##                     Mean   : 5141.2  
##                     3rd Qu.: 6499.1  
##                     Max.   :63554.4
bchain_mkpru<- bchain_mkpru %>% 
  mutate(Date=as.Date(Date,format="%Y-%m-%d"))%>%
  filter(Value!=0)
gg <- ggplot(data=bchain_mkpru, aes(x=Date,y=Value)) + geom_line() 

ggplotly(gg)

2 Badanie powiązań

2.1 Ceny złota a cena bitcoina

df <- bchain_mkpru %>% left_join(gp,c("Date"="g_date")) %>%
  select(Date, Value, g_usd)%>%
  filter(!is.na(Value) & !is.na(g_usd))

df2 <- df%>%
  mutate(month = format(Date, "%m"), year = format(Date, "%Y")) %>%
  group_by(month, year) %>%
  summarise_at(c("g_usd","Value"),mean, na.rm = TRUE) %>%
  rename(avgGold=g_usd,avgBit=Value)%>%
  filter(avgGold!=0 & avgBit!=0)%>%
  mutate(date = make_date(year=year, month=month))
  
gg <- ggplot(df2, aes(x=avgGold, y=avgBit,frame=year))+ geom_point()
ggplotly(gg)
coeff <- 40
goldColor <-"green"
bitcoinColor<-"red"

ggplot(df, aes(x=Date))+
  geom_line(aes(y=g_usd), color=goldColor) +
  geom_line(aes(y=Value/coeff), color=bitcoinColor) +
  scale_y_continuous(
    name = "cena złota",
    sec.axis = sec_axis( trans=~.*coeff,name="cena bitcoina")
  ) +
  theme(
    axis.title.y = element_text(color = goldColor, size=13),
    axis.title.y.right = element_text(color = bitcoinColor, size=13)
  )+
  xlim(as.Date("2017-01-01",format="%Y-%m-%d"),as.Date("2021-09-29",format="%Y-%m-%d"))

df1 <- gp %>% select(g_usd,g_date) %>% rename(Date=g_date)
df2 <- df1%>% inner_join(bchain_mkpru)%>%
  group_by(year =year(Date)) %>%
  summarize(corel=cor(g_usd,Value))

ggplot(data=df2, aes(x=as.character(year), y=corel)) +
  xlab("year")+
  ylab("correlation")+
  geom_bar(stat="identity", width=0.2)

2.2 Cena złota a waluty światowe

gp_tmp <- gp %>% select(g_date, g_usd) %>% rename(Date=g_date, Value=g_usd)
currency <- unlist(unique(cer[c("currency")]))

experiment <- data.frame(indicator=c(),corelation=c())

for(i in currency){
  tmp <- cer%>%filter(currency==i)%>%
   inner_join(gp_tmp)%>%drop_na(value,Value) 
  
  corelation <- cor(tmp[c("value")],tmp[c("Value")])
  
  tmp<-data.frame(i,corelation)
  colnames(tmp)<-c("currency","corelation")
  rownames(tmp) <- NULL
  experiment<- rbind(experiment,tmp)
}

e<-experiment %>% arrange(desc(corelation))
prettyTable(e)

2.3 Cena złota a wskaźniki światowe

gpTmp <-gp %>% 
  mutate(year = format(g_date, "%Y")) %>%
  group_by(year) %>%
  summarise_at(vars(g_usd),list(avg = mean))%>%
  select(year,avg)%>%
  mutate_at("year", as.numeric)

wdiTmp <- wdi %>%
  filter(countryCode =="WLD")%>%
  select(year,developmentIndicators, seriesName ,indicator)

factor<- unlist(unique(wdiTmp[c("indicator")]))

experiment <- data.frame(indicator=c(),corelation=c())

for(i in factor){
  wdiInd <- wdiTmp%>% filter(indicator ==i) %>%
   inner_join(gpTmp,by="year")
  
  corelation <- cor(wdiInd[c("developmentIndicators")],wdiInd[c("avg")])
  
  tmp<-data.frame(i,corelation)
  colnames(tmp)<-c("indicator","corelation")
  rownames(tmp) <- NULL
  experiment<- rbind(experiment,tmp)

}
result1_experiment <- experiment %>% filter(corelation>0.9)
result1_experiment$description<-mapply(translateIndicator, result1_experiment$indicator)
prettyTable(result1_experiment %>% select(description, corelation))

Powyższa tabela prezentuje 19 różnych wskaźników, które mają wysoki (powyżej 0.9) współczynnik korelacji z ceną złota.

result2_experiment <- experiment %>% filter(corelation< (-0.9))
result2_experiment$description<-mapply(translateIndicator, result2_experiment$indicator)
prettyTable(result2_experiment %>% select(description, corelation))

Powyższa tabela prezentuje 11 różnych wskaźników, które mają wysoki (poniżej -0.9) współczynnik korelacji z ceną złota.

2.4 Ceny złota a ceny akcji

df1 <- gp %>% 
  select(g_date,g_usd) %>% 
  mutate(month = format(g_date, "%m"), year = format(g_date, "%Y"))%>% 
  group_by(month, year) %>%
  mutate(g_usd = na.aggregate(g_usd, FUN = mean,na.rm=TRUE))%>%
  mutate(Year = make_date(month=month,year=year))%>%
  select(Year,g_usd)

#head(df1)
#colnames(df1)
df2 <- spComposite %>%
  #select(month,year,Real.Price)%>% 
  mutate(Year = make_date(month=month,year=year))

df3 <- df2 %>%
  inner_join(df1)%>%
  mutate(month = format(Year, "%m"), year = format(Year, "%Y"))

coeff <- 1.0
goldColor <-"green"
actionsPrice<-"red"

ggplot(df3, aes(x=Year))+
  geom_line(aes(y=g_usd), color=goldColor) +
  geom_line(aes(y=Real.Price/coeff), color=actionsPrice) +
  scale_y_continuous(
    name = "cena złota",
    sec.axis = sec_axis( trans=~.*coeff,name="ceny akcji")
  ) +
  theme(
    axis.title.y = element_text(color = goldColor, size=13),
    axis.title.y.right = element_text(color = actionsPrice, size=13)
  )

x<-cor(x=df3$g_usd, y=df3[!names(df3) %in% c("Year","g_usd","month","year")], use = "complete.obs" )
x <- as.data.frame(t(x))%>% rename(corelation=V1) %>% arrange(desc(corelation))

x<-rownames_to_column(x, "NAME")
prettyTable(x)

2.5 Cena bitcoina oraz akcje spółki

df1 <- bchain_mkpru %>%
  mutate(month = format(Date, "%m"), year = format(Date, "%Y"))%>%
  group_by(month, year) %>%
  mutate(Value = na.aggregate(Value, FUN = mean,na.rm=TRUE))%>%
  mutate(Year = make_date(month=month,year=year))%>%
  select(Year,Value)%>%select(-c("month","year"))
  
df2 <- spComposite %>% mutate(Year = make_date(month=month,year=year))%>%select(-c("month","year"))

df3 <- df2 %>% inner_join(df1)%>%select(-c("month","year"))

x <- cor(x=df3$Value, y=df3[!names(df3) %in% c("Year","Value")], use = "complete.obs" )
x <- as.data.frame(t(x))%>% rename(corelation=V1) %>% arrange(desc(corelation))
 
x<-rownames_to_column(x, "NAME")
prettyTable(x)

2.6 Cena bitcoina oraz inne waluty

bp <- bchain_mkpru
currency <- unlist(unique(cer[c("currency")]))

experiment <- data.frame(indicator=c(),corelation=c())

for(i in currency){
  tmp <- cer%>%filter(currency==i)%>%
   inner_join(bp)%>%drop_na(value,Value) 
  
  corelation <- cor(tmp[c("value")],tmp[c("Value")])
  
  tmp<-data.frame(i,corelation)
  colnames(tmp)<-c("currency","corelation")
  rownames(tmp) <- NULL
  experiment<- rbind(experiment,tmp)
}

e<-experiment %>% arrange(desc(corelation))
prettyTable(e)

2.7 Cena bitcoina a wskaźniki światowe

bp <- bchain_mkpru

df2 <- bchain_mkpru%>%
  mutate(year = format(Date, "%Y")) %>%
  group_by(year) %>%
  summarise(avgBit= mean(Value)) %>%
  transform(year = as.numeric(year))

wdiTmp <- wdi %>%
  filter(countryCode =="WLD")%>%
  select(year,developmentIndicators, seriesName ,indicator)


factor<- unlist(unique(wdiTmp[c("indicator")]))
experiment <- data.frame(indicator=c(),corelation=c())

for(i in factor){
  wdiInd <- wdiTmp%>% filter(indicator ==i) %>%
   inner_join(df2,by="year")

  corelation <- cor(wdiInd[c("developmentIndicators")],wdiInd[c("avgBit")])
  tmp<-data.frame(i,corelation)
  colnames(tmp)<-c("indicator","corelation")
  rownames(tmp) <- NULL
  experiment<- rbind(experiment,tmp)
}

result3_experiment <- experiment %>% filter(corelation>0.9)
result3_experiment$description<-mapply(translateIndicator, result3_experiment$indicator)
prettyTable(result3_experiment %>% select(description, corelation))

3 Przewidywanie cen złota

df_gold <- gp %>%
  select(g_date,g_usd) %>% rename(Date=g_date)

df_stonks <- spComposite %>% 
  select(year, month, Dividend, CPI, Real.Earnings)%>%
  mutate(year=as.integer(year), month=as.integer(month))

#head(df_stonks)
prettyTable(result1_experiment) #NY.GDP.MKTP.CD
#colnames(wdi)
df_wld <- wdi %>%
  filter(countryCode=="WLD" & indicator=="NY.GDP.MKTP.CD") %>%
  rename(GPDpc=developmentIndicators)%>%
  select(GPDpc, year)


df_cur_Australian.Dollar <- cer %>% filter(currency %in% c("Australian.Dollar"))%>%
  rename(Australian.Dollar=value) %>% select(Date, Australian.Dollar)
df_cur_Brunei.Dollar <- cer %>% filter(currency %in% c("Brunei.Dollar"))%>%
  rename(Brunei.Dollar=value) %>% select(Date, Brunei.Dollar)
df_cur <- merge(df_cur_Australian.Dollar, df_cur_Brunei.Dollar, by="Date")


vanilla <- df_gold %>% inner_join((df_cur)) %>%
  mutate(month =as.integer(format(Date, "%m")), year =as.integer( format(Date, "%Y")))%>%
  inner_join(df_stonks, by = c("year" = "year", "month" = "month"))%>%
  inner_join(df_wld, by=c("year"="year")) %>%select(-c(year, month))
  
  
  
summary(vanilla)
##       Date                g_usd        Australian.Dollar Brunei.Dollar  
##  Min.   :1998-09-02   Min.   : 252.9   Min.   :0.4833    Min.   :1.000  
##  1st Qu.:2003-07-21   1st Qu.: 363.6   1st Qu.:0.6579    1st Qu.:1.347  
##  Median :2008-05-21   Median : 855.6   Median :0.7633    Median :1.464  
##  Mean   :2008-06-11   Mean   : 849.7   Mean   :0.7741    Mean   :1.507  
##  3rd Qu.:2013-05-09   3rd Qu.:1260.0   3rd Qu.:0.8954    3rd Qu.:1.698  
##  Max.   :2018-04-30   Max.   :1893.0   Max.   :1.1055    Max.   :1.850  
##     Dividend          CPI        Real.Earnings         GPDpc          
##  Min.   :15.69   Min.   :163.6   Min.   :  8.805   Min.   :3.140e+13  
##  1st Qu.:16.74   1st Qu.:184.2   1st Qu.: 65.935   1st Qu.:3.895e+13  
##  Median :24.10   Median :212.2   Median : 89.879   Median :6.044e+13  
##  Mean   :26.71   Mean   :208.5   Mean   : 84.207   Mean   :5.786e+13  
##  3rd Qu.:32.88   3rd Qu.:232.9   3rd Qu.:105.320   3rd Qu.:7.523e+13  
##  Max.   :50.33   Max.   :250.5   Max.   :128.344   Max.   :8.634e+13
count(vanilla)
## # A tibble: 1 x 1
##       n
##   <int>
## 1  4514
chocola <- vanilla %>% select(-c(Date))
set.seed(9)

inTraining <- 
    createDataPartition(
        y = chocola$g_usd,
        p = .75,
        list = FALSE)

training <- chocola[ inTraining,]
testing  <- chocola[-inTraining,]


fitControl <- trainControl(method = "repeatedcv",   
                        number = 10,     # number of folds
                        repeats = 10)    # repeated ten times

model <- train(g_usd ~ .,
               data = training,
               method = "lm",  # now we're using the lasso method
               trControl = fitControl)  

model
## Linear Regression 
## 
## 3386 samples
##    6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 10 times) 
## Summary of sample sizes: 3047, 3047, 3047, 3048, 3048, 3048, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   109.9837  0.9466706  84.53003
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
predictions <- predict(model, testing)
postResample(pred = predictions, obs = testing$g_usd)
##        RMSE    Rsquared         MAE 
## 110.0002362   0.9464649  85.2215941
tmp<- testing
tmp$pred<-predictions
tmp<-tmp%>%select(g_usd,pred)
head(tmp)
## # A tibble: 6 x 2
##   g_usd  pred
##   <dbl> <dbl>
## 1 1321. 1315.
## 2 1348. 1333.
## 3 1347. 1329.
## 4 1344. 1335.
## 5 1337. 1338.
## 6 1346. 1338.